# First_Stage_Senate_House_Separated.R
# Goal: Script that performs first stage regressions that disaggregate Senators 
# per million and House members per million to assess how much Senate representation 
# and House representation separately contribute to variation in federal aid per 
# resident
# Author: Shantanu Kamat
# Last update: 2025-08-14

# set up directories

# uncomment the line below and add working directory path between the quotation marks
# master_directory = "ADD WORKING DIRECTORY HERE/"

data_folder = paste0(master_directory,"Data/")
do_folder = paste0(master_directory,"Do/")
results_folder = paste0(master_directory,"Results/")
charts_folder = paste0(master_directory,"Charts/")

# Load libraries
library(tidyverse)
library(janitor)
library(readxl)
library(cdlTools)
library(fuzzyjoin)
library(haven)
library(rvest)
library(Quandl)
library(lubridate)
library(openxlsx)
library(tidystringdist)
library(rio)
library(ropensecretsapi)
library(sandwich)
library(lmtest)
library(estimatr)
library(ivreg)
library(modelsummary)

# load data frame

state_elections_panel_df9 <- read_xlsx(paste0(data_folder,"Processing/state_elections_panel_df_20240709.xlsx"))

# add variables for sens_per_million and house_per_million

state_elections_panel_df10 <- state_elections_panel_df9 %>%
  mutate(sens_per_million = 1000000 * 2/pop, 
         house_per_million = 1000000 * house_seats/pop)

# run first stage regressions with sens_per_million and house_per_million (rather than reps_per_million)

# including control for normal vote
first_stage_sens_house_with_norm_vote <- lm(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million + norm_vote_top2_share, 
                                            filter(state_elections_panel_df10, year >= 2020, year <= 2022))

summary(first_stage_sens_house_with_norm_vote)

coeftest(first_stage_sens_house_with_norm_vote, vcov = vcovCL, type = "HC1", cluster = ~state)

# first_stage_sens_house_with_norm_vote1 and first_stage_sens_house_without_norm_vote1
# are our preferred specifications for Senators per million and House members
# per million disaggregated.
first_stage_sens_house_with_norm_vote1 <- lm_robust(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million + norm_vote_top2_share, 
                                                    data = filter(state_elections_panel_df10, year >= 2020, year <= 2022), 
                                                    clusters = state, 
                                                    se_type = "stata")

summary(first_stage_sens_house_with_norm_vote1)

# NOT including control for normal vote

first_stage_sens_house_without_norm_vote <- lm(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million, 
                                               filter(state_elections_panel_df10, year >= 2020, year <= 2022))

summary(first_stage_sens_house_without_norm_vote)

coeftest(first_stage_sens_house_without_norm_vote, vcov = vcovCL, type = "HC1", cluster = ~state)

# first_stage_sens_house_with_norm_vote1 and first_stage_sens_house_without_norm_vote1
# are our preferred specifications for Senators per million and House members
# per million disaggregated.
first_stage_sens_house_without_norm_vote1 <- lm_robust(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million, 
                                                       data = filter(state_elections_panel_df10, year >= 2020, year <= 2022), 
                                                       clusters = state, 
                                                       se_type = "stata")

summary(first_stage_sens_house_without_norm_vote1)

# first_stage_sens_house_without_norm_vote1 and first_stage_sens_house_with_norm_vote1
# are used in columns 1 and 2 respectively of Table A.3

gof_map_custom <- data.frame(raw = c("nobs", "r.squared"), 
                             clean = c("Observations", "R-squared"), 
                             fmt = c(0, 3))

modelsummary(list(first_stage_sens_house_without_norm_vote1, first_stage_sens_house_with_norm_vote1), 
             stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01), 
             coef_omit = "(Intercept)", 
             coef_map = c("sens_per_million" = "Sens per Million", 
                          "house_per_million" = "House per Million", 
                          "norm_vote_top2_share" = "Normal Vote"), 
             gof_map = gof_map_custom, 
             add_rows = data.frame(term = "F-Stat", 
                                   m = format(round(first_stage_sens_house_without_norm_vote1$fstatistic[1], 1), nsmall = 0), 
                                   m = format(round(first_stage_sens_house_with_norm_vote1$fstatistic[1], 1), nsmall = 0)), 
             title = "First Stage Regressions with Senators per Million and House Members per Million Disaggregated", 
             notes = "Note")
